This notebook details the process of prediction from which homework a notebook came after featurizing the notebook using the top down method. This is done by gathering all templates in each notebook after running the algorithm, then using countvectorizer to featurize the notebooks, and finally using random forests to make the prediction
In [2]:
import sys
home_directory = '/dfs/scratch2/fcipollone'
sys.path.append(home_directory)
import numpy as np
from nbminer.notebook_miner import NotebookMiner
hw_filenames = np.load('../homework_names_jplag_combined_per_student.npy')
hw_notebooks = [[NotebookMiner(filename) for filename in temp[:59]] for temp in hw_filenames]
In [3]:
from nbminer.pipeline.pipeline import Pipeline
from nbminer.features.features import Features
from nbminer.preprocess.get_ast_features import GetASTFeatures
from nbminer.preprocess.get_imports import GetImports
from nbminer.preprocess.resample_by_node import ResampleByNode
from nbminer.encoders.ast_graph.ast_graph import ASTGraphReducer
from nbminer.preprocess.feature_encoding import FeatureEncoding
from nbminer.encoders.cluster.kmeans_encoder import KmeansEncoder
from nbminer.results.similarity.jaccard_similarity import NotebookJaccardSimilarity
from nbminer.results.prediction.corpus_identifier import CorpusIdentifier
a = Features(hw_notebooks[0], 'hw0')
a.add_notebooks(hw_notebooks[1], 'hw1')
a.add_notebooks(hw_notebooks[2], 'hw2')
a.add_notebooks(hw_notebooks[3], 'hw3')
a.add_notebooks(hw_notebooks[4], 'hw4')
a.add_notebooks(hw_notebooks[5], 'hw5')
gastf = GetASTFeatures()
rbn = ResampleByNode()
gi = GetImports()
fe = FeatureEncoding()
ke = KmeansEncoder(n_clusters = 70)
ci = CorpusIdentifier()
pipe = Pipeline([gastf, rbn, gi, fe, ke, ci])
a = pipe.transform(a)
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f38efd1c198>
354
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x7f3904ee06d8>
354
<nbminer.preprocess.get_imports.GetImports object at 0x7f38efd1c208>
354
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x7f38efd232e8>
354
<nbminer.encoders.cluster.kmeans_encoder.KmeansEncoder object at 0x7f38efd23b00>
354
<nbminer.results.prediction.corpus_identifier.CorpusIdentifier object at 0x7f38efd1d0b8>
354
In [4]:
import tqdm
X, y = ci.get_data_set()
similarities = np.zeros((len(X), len(X)))
for i in tqdm.tqdm(range(len(X))):
for j in range(len(X)):
if len(set.union(set(X[i]), set(X[j]))) == 0:
continue
similarities[i][j] = len(set.intersection(set(X[i]), set(X[j]))) / (len(set.union(set(X[i]), set(X[j]))))
100%|██████████| 354/354 [00:06<00:00, 54.60it/s]
The first measure that we can use to determine if something reasonable is happening is to look at, for each homework, the average similarity of two notebooks both pulled from that homework, and the average similarity of a notebook pulled from that homework and any notebook in the corpus not pulled from that homework. These are printed below
In [6]:
def get_avg_inter_intra_sims(X, y, val):
inter_sims = []
intra_sims = []
for i in range(len(X)):
for j in range(i+1, len(X)):
if y[i] == y[j] and y[i] == val:
intra_sims.append(similarities[i][j])
else:
inter_sims.append(similarities[i][j])
return np.array(intra_sims), np.array(inter_sims)
for i in np.unique(y):
intra_sims, inter_sims = get_avg_inter_intra_sims(X, y, i)
print('Mean intra similarity for hw',i,'is',np.mean(intra_sims),'with std',np.std(intra_sims))
print('Mean inter similarity for hw',i,'is',np.mean(inter_sims),'with std',np.std(inter_sims))
print('----')
Mean intra similarity for hw 0 is 0.3151918865082649 with std 0.15305001930552434
Mean inter similarity for hw 0 is 0.34064003395119835 with std 0.15939630105028213
----
Mean intra similarity for hw 1 is 0.3184891524399751 with std 0.1524151555252305
Mean inter similarity for hw 1 is 0.3405471983084609 with std 0.15942725986191494
----
Mean intra similarity for hw 2 is 0.3852052801437633 with std 0.18604092125516283
Mean inter similarity for hw 2 is 0.3386687830624319 with std 0.15827393638869489
----
Mean intra similarity for hw 3 is 0.33052549366297923 with std 0.12467841412289758
Mean inter similarity for hw 3 is 0.3402083110313084 with std 0.16013806695440713
----
Mean intra similarity for hw 4 is 0.35523939719664244 with std 0.16688935834578167
Mean inter similarity for hw 4 is 0.3395124826793897 with std 0.15903923159007144
----
Mean intra similarity for hw 5 is 0.3456541975320804 with std 0.16882797566916016
Mean inter similarity for hw 5 is 0.33978235723305217 with std 0.158999942991696
----
In [22]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = 5, 10
def get_all_sims(X, y, val):
sims = []
sims_actual = []
for i in range(len(X)):
for j in range(i+1, len(X)):
if y[i] == val or y[j] == val:
sims.append(similarities[i][j])
if y[i] == val and y[j] == val:
sims_actual.append(similarities[i][j])
return sims, sims_actual
fig, axes = plt.subplots(6,2)
for i in range(6):
axes[i,0].hist(get_all_sims(X,y,i)[0], bins=30)
axes[i,1].hist(get_all_sims(X,y,i)[1], bins=30)
While the above results are helpful, it is better to use a classifier that uses more information. The setup is as follows:
In [ ]:
In [ ]:
In [10]:
import sklearn
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
X, y = ci.get_data_set()
countvec = sklearn.feature_extraction.text.CountVectorizer()
X_list = [" ".join(el) for el in X]
countvec.fit(X_list)
X = countvec.transform(X_list)
p = np.random.permutation(len(X.todense()))
X = X.todense()[p]
y = np.array(y)[p]
clf = sklearn.ensemble.RandomForestClassifier(n_estimators=400, max_depth=3)
scores = cross_val_score(clf, X, y, cv=10)
print(scores)
print(np.mean(scores))
[0.88888889 0.97222222 0.91666667 0.91666667 0.94444444 1.
0.91666667 0.88888889 0.94444444 1. ]
0.9388888888888889
In [18]:
X.shape
Out[18]:
(354, 70)
In [17]:
clf.fit(X,y)
fnames= countvec.get_feature_names()
clfi = clf.feature_importances_
sa = []
for i in range(len(clfi)):
sa.append((clfi[i], fnames[i]))
sra = [el for el in reversed(sorted(sa))]
import astor
for temp in sra:
temp = temp[1]
print(temp)
for i in range(3):
print ('\t',astor.to_source(ke.templates.get_random_example(temp)))
template_25
var = pd.read_excel('string', sheetname='string', header=None)
var = pd.read_excel(var + 'string' + 'string')
var = pd.read_excel('string', 'string', index_col=0, header=None)
template_7
var = pd.read_csv('string', index_col=None)
var = pd.read_csv('string')
var = pd.read_csv('string', index_col='string')
template_67
pd.read_csv('string').head(20)
pd.read_csv('string', skiprows=[3, 4, 6]).head()
pd.read_csv('string').head(20)
template_17
var = pd.Series([632, 1638, 569, 115])
var = pd.Series([int(var) for var in var])
var = pd.Series([632, 1638, 569, 115], index=['string', 'string', 'string',
'string'])
template_63
var = [(var.endswith('string') and var >= 1000) for var, var in var[[
'string', 'string']].values]
[var.endswith('string') for var in var.index]
var = [var.endswith('string') for var in var.phylum]
template_64
map = folium.Map(location=[46.8, 8], tiles='string', zoom_start=8)
var = folium.Map(location=[46.57, 8], zoom_start=8)
var = folium.Map(location=[46.5966, 7.9761], zoom_start=7)
template_56
pd.cut(var.age, [20, 30, 40, 50, 60, 70, 80, 90], right=False)[:30]
pd.cut(var.age, [20, 40, 60, 80, 90], labels=['string', 'string', 'string',
'string'])[:30]
pd.cut(var.age, [20, 40, 60, 80, 90], labels=['string', 'string', 'string',
'string'])[:30]
template_1
var[:3]
var = ['string', 'string', 'string']
var
template_10
var.agg(np.mean).head()
var.reindex(var, fill_value='string', columns=['string']).head()
var.head()
template_62
pd.concat([var, var], axis=1, join='string').head()
pd.concat([var, var], keys=['string', 'string']).head()
pd.concat([var, var], axis=1).head()
template_33
np.concatenate([np.random.random(5), np.random.random(5)])
np.r_[np.random.random(5), np.random.random(5)]
np.c_[np.random.random(5), np.random.random(5)]
template_5
var = var.count().unstack('string')
var = var['string'].unstack('string')
var = var['string'].unstack('string')
template_69
var = WordCloud().generate(var)
var.generate(var)
var = WordCloud().generate('string'.join(var))
template_39
var = var.fillna(0)
var.fillna(method='string')
var['string'].fillna('string', inplace=True)
template_27
var = pd.DataFrame(dict(id=list(range(3)) + list(range(3)), score=np.random
.random(size=6)))
var = pd.DataFrame(dict(id=list(range(3)) + list(range(3)), score=np.random
.random(size=6)))
def filter_tokens(row_lower_case_tokens):
var = list(filter(lambda w: 'string' in var or var.isalpha() or var.
isdigit(), var))
var = list(filter(lambda w: 'string' == var or len(var) > 1, var))
return list(filter(lambda w: var not in var, var))
template_6
var.mean().add_suffix('string').head()
var[['string']].mean().head()
var.mean().head()
template_2
pd.concat([var, var], axis=0).index.is_unique
var = pd.concat([var, var, var], axis=0)
pd.concat([var, var], keys=['string', 'string']).index.is_unique
template_59
pd.concat(dict(patient1=var, patient2=var), axis=1).head()
pd.concat(dict(patient1=var, patient2=var), axis=1).head()
pd.concat(dict(patient1=var, patient2=var), axis=1).head()
template_26
var.mean()
var = var.weight.mean()
def generate_X_std(players, features):
var = var[var]
return var.sub(var.mean(), axis=1).div(var.std(), axis=1)
template_57
def describe_doc_frequencies():
var = list(var.dfs.values())
plt.plot(var)
print('string'.format(np.min(var), np.mean(var), np.max(var)))
def jitter(x):
var = 0.01 * (max(var) - min(var))
return var + np.random.randn(len(var)) * var
var = lambda x: var.max() - var.min()
template_12
var.drop(['string', 'string'], axis=1)
var.drop('string', axis=1, inplace=True)
var = var.drop('string', axis=1)
template_4
var = var.dropna(subset=['string'])
var = var.dropna()
var.dropna()
template_60
var = var.groupby(['string', 'string', 'string'])['string'].sum() / var[
'string'].sum() * 100
var = var[['string', 'string']].groupby('string').sum()
var = var.groupby(['string']).sum().reset_index().rename(columns={'string':
'string', 'string': 'string'})
template_48
var.treat.replace({'string': 0, 'string': 1, 'string': 2})
var.treat.replace({'string': 0, 'string': 1, 'string': 2})
var.treat.replace({'string': 0, 'string': 1, 'string': 2})
template_20
var = pd.DataFrame({'string': [0.0, 0.0]}, index=['string', 'string'])
var = pd.DataFrame(var, columns=['string', 'string'])
var = pd.DataFrame({'string': [632, 1638, 569, 115, 433, 1130, 754, 555],
'string': [1, 1, 1, 1, 2, 2, 2, 2], 'string': ['string', 'string',
'string', 'string', 'string', 'string', 'string', 'string']})
template_55
var = set(opinion_lexicon.positive())
var = list(set(var))
var = set(stopwords.words('string'))
template_58
plt.axis('string')
plt.axis('string')
plt.axis('string')
template_14
var = var.groupby('string')
var = var.groupby(var.patient)
var.groupby(by='string').apply(var).dropna().sort_values()
template_35
pd.merge(var, var)
pd.merge(var, var)
var = pd.merge(var[['string', 'string', 'string', 'string']], var[['string'
]], left_index=True, right_index=True, how='string')
template_24
var[(var.survived == True) & var.boat.notnull()].shape[0] / var[var.
survived == True].shape[0]
var[var.notnull()]
var[var.notnull()]
template_41
var = var[~pd.isnull(var['string']) | ~pd.isnull(var['string'])]
var.isnull()
var = var[var.rater2.isnull() & ~var.rater1.isnull()]
template_16
var().run_line_magic('string', 'string')
var().run_line_magic('string', 'string')
var().run_line_magic('string', 'string')
template_0
HTML(filename='string')
var['string'] = pd.to_datetime(var['string'])
var = dict(zip(var.keys(), np.zeros(len(var.keys()))))
template_47
var.fillna(0)[var.pclass == 3].fare.describe()
var['string'].value_counts().describe()
var.describe()
template_9
var['string'] = var.treat.astype('string')
var = var.player + var.team + var.year.astype(str)
var = var.apply(var).astype('string')
template_19
var.treat.value_counts()
var.treat.value_counts()
var = var.University.value_counts().value_counts()
template_29
var.name.isnull().sum()
var = sum(var['string'])
print(var.survived.sum() / var.shape[0])
template_65
try:
var = pickle.load(var, open('string', 'string'))
except:
var = pd.read_csv('string', parse_dates=['string'])
pickle.dump(var, open('string', 'string'))
var = BeautifulSoup(open(var, encoding='string'), 'string')
folium.TopoJson(open(var), 'string', name='string').add_to(var)
template_30
var = var.index.map(lambda x: 'string'.join(var.split('string')[:3]))
def combineDataYear(new_data, old_data, semestre, year):
var = var.split('string')[1] + 'string' + var.split('string')[2]
for var in var['string'].tolist():
if var in var['string'].tolist():
var[var].loc[var['string'] == var] = True
var['string'].loc[var['string'] == var] = var
else:
var['string'].loc[var['string'] == var] = var
var['string'].loc[var['string'] == var] = var
var = pd.concat([var, var.loc[var['string'] == var]],
ignore_index=True)
var.set_index(['string'], inplace=True, drop=False)
return var
var = var.index.map(lambda x: 'string'.join(var.split('string')[:3]))
template_43
var = var.value.copy()
var = var.copy()
var = var.copy()
template_45
var.show()
plt.show()
plt.show()
template_28
var.sort_values(ascending=False)
var.sort_values(ascending=False)
var.hr.sort_values()
template_36
var.hist(figsize=(15, 8), color='string')
var.fare.hist(bins=30)
var.seg_length.hist(bins=500)
template_46
for var in range(11, 0, -1):
var.append(var(range(12), var))
for var in range(len(var)):
var = pd.read_csv(var[var], parse_dates=True, infer_datetime_format=
True, index_col='string')
var = var[['string', 'string']]
var.append(var)
var = [(0) for var in range(var.shape[0])]
template_37
plt.plot(range(15), var, label='string')
var = var.plot(kind='string', title='string')
plt.plot(var, label='string')
template_50
var = var.apply(np.average)
var.position = var.position.apply(lambda x: str(var))
var = var[var['string'].apply(var) == True]
template_53
sns.set_context('string')
sns.set_context('string')
sns.set_context('string')
template_51
var = var.set_index(['string', 'string'])
var.set_index('string', inplace=True)
var.set_index('string', inplace=True)
template_13
var.loc[var.Canton.isin(['string', 'string', 'string', 'string', 'string',
'string', 'string']), 'string'] = 'string'
var = var.ix[~var['string'].isin(var)]
var = var[~var.index.isin(var.canton)]
template_61
[var for var in var.type.unique() if var.find('string') == -1]
var['string'].unique()
np.sort(var['string'].unique()), np.sort(var['string'].unique())
template_3
for var in var:
var = var[var['string'] == var]['string']
print(var)
print(stats.ks_2samp(var, var['string']))
print('string')
for var, var in var:
print('string', var)
print('string', var)
for var, var in var:
print('string', var)
print('string', var)
template_23
print('string'.format(int(var)))
print(var['string'])
print(var)
template_54
plt.ylabel('string')
plt.ylabel('string', fontweight='string')
plt.ylabel('string')
template_52
for var in pycountry.countries:
var += 1
var = None
try:
var = var.official_name
except:
var += 1
var.append([var.alpha_2, var.alpha_3, var.name, var])
for var, var in var.items():
for var in var:
if var not in var.keys():
var[var] = []
var[var].append(var[var])
for var in range(var.shape[0]):
var.append(var(var.iloc[var]['string'], var.iloc[var]['string']))
template_40
var(var)
var = set([var for var in var['string'] if not var(var)])
var(var)
template_8
var.query('string').groupby('string').sex.count() / var.groupby('string'
).sex.count()
def lookup_country(email):
var = {}
words = re.findall('string', var)
for var in var:
var = 0
var += var.count(var['string'])
if var['string'] == 'string':
var += words.count('string')
var += words.count('string')
var += words.count('string')
var += words.count('string')
elif var['string'] == 'string':
var += words.count('string')
var += words.count('string')
var += words.count('string')
var += words.count('string')
if var != 0:
var[var['string']] = var
if var:
var = max(var, key=var.get)
return var
else:
return
def lookup_country(email):
var = {}
words = re.findall('string', var)
for var in var:
var = 0
var += var.count(var['string'])
if var['string'] == 'string':
var += words.count('string')
var += words.count('string')
var += words.count('string')
var += words.count('string')
elif var['string'] == 'string':
var += words.count('string')
var += words.count('string')
var += words.count('string')
var += words.count('string')
if var != 0:
var[var['string']] = var
if var:
var = max(var, key=var.get)
return var
else:
return
template_32
print('string' + str(var.shape[0]) + 'string' + str(var.shape[0] * 100 /
var.shape[0]) + 'string')
def getAllData(startyear, endyear, semester):
var = var(str(var) + 'string' + str(var + 1), var)
for var in range(var, var):
var = pd.merge(var, var(str(var + 1) + 'string' + str(var + 2), var
), how='string', on=['string', 'string', 'string'])
var.fillna(value='string')
return var
print('string' + str(var) + 'string' + str(var))
template_49
def get_classifier_score(n_estimators, max_depth):
var = var(var, var, n_estimators=var, max_depth=var)
var, var = var(var, var, var)
return var, var, var, var
for var in var:
var = var[1]
var.append({'string': var[0], 'string': var(var, 'string').size / var,
'string': var(var, 'string').size / var})
var = [var for var in var if not numpy.isnan(var.var()) and not var.var() ==
0.0]
template_21
for var in range(len(var)):
if var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
for var in range(len(var)):
if var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
elif var[var.index == var]['string'][var] == 'string':
var.append(var[var.index == var][['string', 'string']])
def get_key_and_value(url):
"""string"""
var = requests.get(var)
var = BeautifulSoup(var.text, 'string')
var = var.find_all('string')
var = var[1].attrs['string']
var = 'string' + var
var = requests.get(var)
var = BeautifulSoup(var.text, 'string')
var = var.contents[0]
var = var.body
var = var.find_all(['string'])
var = var[1].find_all('string')
var = var[1].find_all('string')
var = var[1].find_all('string')
var = []
var = []
var = []
var = []
var = []
var = 0
var = []
var = []
for var in var:
if var['string']:
var.append(var['string'])
var.append(var['string'])
elif var['string'].startswith('string'):
continue
for var in var:
for var in var:
var = int(var['string'])
if var[0].startswith('string'):
var.append(var)
elif 'string' in var:
var.append(var)
elif 'string' in var:
var.append(var)
elif 'string' in var:
var.append(var)
elif 'string' in var or 'string' in var:
var.append(var)
elif 'string' in var:
var = var
else:
continue
var = [var[0], var[6]]
var = [var[0], var[1], var[2]]
for var in var:
if var['string']:
var.append(var['string'])
var.append(var)
var.append(var[::-1])
var.append(var)
var.append(var)
var.append(var)
var.append(var)
return var, var, var
template_31
print('string', len(var), 'string', (len(var) - len(var)) / len(var))
for var in range(len(var)):
var = var[var].select('string')
print('string', var + 1, 'string', var[var]['string'], 'string', len(
var), 'string')
for var in range(len(var)):
if len(var[var].contents) == 0:
print('string', 'string', var[var]['string'])
else:
print(var[var].contents[0], 'string', var[var]['string'])
print('string')
def add_students(html_res, year, semester):
"""string"""
print('string', var, 'string', var)
var = BeautifulSoup(var, 'string')
var = var.body.table
var = []
var = var.find_all('string')
for var in var:
var = var.find_all('string')
var = [var.text.strip() for var in var]
var.append(var)
var = pd.DataFrame(var)
print('string', len(var))
var = len(var)
if len(var):
var = var.drop([1, 2, 3, 4, 5, 6, 7, 8, 9, 11], axis=1)
var.columns = ['string', 'string']
var = pd.Series([int(var)] * len(var))
var['string' + str(var)] = var
var['string'] = pd.to_numeric(var['string'], errors='string')
var = var.dropna(subset=['string'])
print('string', var - len(var))
return var
template_42
def student_is_valid(df, sciper):
var = var(var, var)
var = len(var(var, 1)) > 0
var = len(var(var, 2)) > 0
var = var(var)
var = var(var)
return var and var and var and var
for var in var:
var = var(var)
var = requests.get('string' + var + 'string')
var = BeautifulSoup(var.content, 'string')
var = var(var)
if not var:
var = var(var)
var = requests.get('string' + var + 'string')
var = BeautifulSoup(var.content, 'string')
var = var(var)
if not var:
var = var(var)
for var in var:
var = requests.get('string' + var + 'string')
var = BeautifulSoup(var.content, 'string')
var = var(var)
if not var == []:
break
if var == 'string':
var = []
var[var] = var
var = [{var(var.name): var(var.name), var(var.alpha_2): var(var.name), var(
var.alpha_3): var(var.name)} for var in pycountry.countries]
template_38
def build_dataframe(soup, list_of_columns):
var = var.select('string')
var = []
for var in var:
var = []
for var in var[2:]:
var = var.contents[var[var]].contents
if var == []:
var.append('string')
else:
var.append(var[0])
var.append(var)
var = dict(zip(var, var))
var = pd.DataFrame(var)
return var
for var in var.index.drop_duplicates():
var = var.loc[var]
var = False
var = str(var).split('string')
var = 'string'.join(var[0:-1])
var = var[-1]
if var == 'string':
var = {'string': 'string', 'string': var['string'].sum()}
var = var.append(pd.Series(var), ignore_index=True)
var.drop(var, inplace=True)
continue
if var == 'string':
var = {'string': 'string', 'string': var['string'].sum()}
var = var.append(pd.Series(var), ignore_index=True)
var.drop(var, inplace=True)
continue
if var == 'string':
var = {'string': 'string', 'string': var['string'].sum()}
var = var.append(pd.Series(var), ignore_index=True)
var.drop(var, inplace=True)
continue
for var in var:
if var in str(var):
var = {'string': var, 'string': var['string'].sum()}
var = var.append(pd.Series(var), ignore_index=True)
var.drop(var, inplace=True)
var = True
break
if var == True:
continue
var['string'] = var
var = requests.get(var, params=var)
var = BeautifulSoup(var.content, 'string')
if 'string' not in var.prettify():
for var in var:
if var in str(var.findAll()):
var = {'string': var, 'string': float(var['string'].sum())}
var = var.append(pd.Series(var), ignore_index=True)
var.drop(var, inplace=True)
break
else:
var['string'] = var
var = requests.get(var, params=var)
var = BeautifulSoup(var.content, 'string')
if 'string' in var.prettify():
var = pd.concat([var, var])
else:
for var in var:
if var in str(var.findAll()):
var = {'string': var, 'string': float(var['string'].sum())}
var = var.append(pd.Series(var), ignore_index=True)
var.drop(var, inplace=True)
break
for var, var in var.iterrows():
var = set(wordpunct_tokenize(var.text))
for var in var:
if var in var:
var[var].append(var)
elif var in var:
var[var].append(var)
elif var in var:
var[var].append(var)
template_68
var[var][var[var]['string'].str.contains('string')]
var.loc[var.Institution.str.contains('string') == True, 'string'] = 'string'
var.loc[var.Institution.str.contains('string') == True, 'string'] = 'string'
template_66
for var, var in enumerate(var['string']):
var = np.where(var['string'] == var)
var = np.where(var['string'] == var)
var = np.where(var['string'] == var)
var = np.where(var['string'] == var)
var = np.where(var['string'] == var)
var = var['string'].ix[var[0][0]]
var = var['string'].ix[var[0][0]]
var = min(int(var[0:4]), int(var[0:4]))
if int(var[0:4]) < int(var[0:4]):
var[var] -= 6
var = 0
var = 0
if len(var[0]) != 0 or len(var[0]) != 0:
if len(var[0]) != 0:
var = var['string'].ix[var[0][len(var[0]) - 1]]
var = int(var[0:4])
if len(var[0]) != 0:
var = var['string'].ix[var[0][len(var[0]) - 1]]
var = int(var[0:4])
if var > var:
var[var] -= 6
var = max(var, var)
else:
var = var['string'].ix[var[0][len(var[0]) - 1]]
var = var['string'].ix[var[0][len(var[0]) - 1]]
if var['string'].isin([var]).values.any():
var = var['string'].ix[var[0][len(var[0]) - 1]]
var = max(int(var[0:4]), int(var[0:4]), int(var[0:4]))
if int(var[0:4]) > int(var[0:4]) or int(var[0:4]) > int(var[0:4]):
var[var] -= 6
else:
var = max(int(var[0:4]), int(var[0:4]))
if int(var[0:4]) > int(var[0:4]):
var[var] -= 6
var[var] += 12 * (var - var + 1)
for var, var in enumerate(var['string']):
var = np.where(var['string'] == var)
var = np.where(var['string'] == var)
var = np.where(var['string'] == var)
var = np.where(var['string'] == var)
var = np.where(var['string'] == var)
var = var['string'].ix[var[0][0]]
var = var['string'].ix[var[0][0]]
var = min(int(var[0:4]), int(var[0:4]))
if int(var[0:4]) < int(var[0:4]):
var[var] -= 6
var = 0
var = 0
if len(var[0]) != 0 or len(var[0]) != 0:
if len(var[0]) != 0:
var = var['string'].ix[var[0][len(var[0]) - 1]]
var = int(var[0:4])
if len(var[0]) != 0:
var = var['string'].ix[var[0][len(var[0]) - 1]]
var = int(var[0:4])
if var > var:
var[var] -= 6
var = max(var, var)
else:
var = var['string'].ix[var[0][len(var[0]) - 1]]
var = var['string'].ix[var[0][len(var[0]) - 1]]
if var['string'].isin([var]).values.any():
var = var['string'].ix[var[0][len(var[0]) - 1]]
var = max(int(var[0:4]), int(var[0:4]), int(var[0:4]))
if int(var[0:4]) > int(var[0:4]) or int(var[0:4]) > int(var[0:4]):
var[var] -= 6
else:
var = max(int(var[0:4]), int(var[0:4]))
if int(var[0:4]) > int(var[0:4]):
var[var] -= 6
var[var] += 12 * (var - var + 1)
for var, var in enumerate(var['string']):
var = np.where(var['string'] == var)
var = np.where(var['string'] == var)
var = np.where(var['string'] == var)
var = np.where(var['string'] == var)
var = np.where(var['string'] == var)
var = var['string'].ix[var[0][0]]
var = var['string'].ix[var[0][0]]
var = min(int(var[0:4]), int(var[0:4]))
if int(var[0:4]) < int(var[0:4]):
var[var] -= 6
var = 0
var = 0
if len(var[0]) != 0 or len(var[0]) != 0:
if len(var[0]) != 0:
var = var['string'].ix[var[0][len(var[0]) - 1]]
var = int(var[0:4])
if len(var[0]) != 0:
var = var['string'].ix[var[0][len(var[0]) - 1]]
var = int(var[0:4])
if var > var:
var[var] -= 6
var = max(var, var)
else:
var = var['string'].ix[var[0][len(var[0]) - 1]]
var = var['string'].ix[var[0][len(var[0]) - 1]]
if var['string'].isin([var]).values.any():
var = var['string'].ix[var[0][len(var[0]) - 1]]
var = max(int(var[0:4]), int(var[0:4]), int(var[0:4]))
if int(var[0:4]) > int(var[0:4]) or int(var[0:4]) > int(var[0:4]):
var[var] -= 6
else:
var = max(int(var[0:4]), int(var[0:4]))
if int(var[0:4]) > int(var[0:4]):
var[var] -= 6
var[var] += 12 * (var - var + 1)
template_44
for var in range(2007, 2017):
for var in range(1, 7):
var = var(period='string' + str(var), year=str(var) + 'string' +
str(var + 1))
for var in var:
var = var[7]
id = int(var[10])
if var == 'string':
var = var[0]
if var not in ['string', 'string']:
raise Exception('string' + var + 'string' + str(var) +
'string' + str(var) + 'string' + str(id))
var = int(var == 'string')
if id not in var:
if var == 1:
var[id] = {'string': var, 'string': 0, 'string': []}
else:
continue
elif var[id]['string'] != var:
raise var('string')
var[id]['string'] += 1
var[id]['string'].append(str(var) + 'string' + str(var))
elif var == 'string':
if id not in var:
var[id] = {'string': [], 'string': 0}
var[id]['string'].append(str(var) + 'string' + str(var))
var[id]['string'] += 1
elif var == 'string':
if id not in var:
var[id] = {'string': [], 'string': 0}
var[id]['string'].append(str(var) + 'string' + str(var))
var[id]['string'] += 1
else:
raise Exception('string' + var + 'string' + str(var) +
'string' + str(var) + 'string' + str(id))
for var in range(2007, 2017):
for var, var in var:
var = var(period=var, year=str(var) + 'string' + str(var + 1))
for var in var:
var = var[0]
if var not in ['string', 'string']:
raise Exception('string' + var + 'string' + str(var) +
'string' + str(var) + 'string' + str(id))
var = int(var == 'string')
id = int(var[10])
if id not in var:
var[id] = {'string': var, 'string': 'string', 'string': 0,
'string': [], 'string': 0, 'string': [], 'string': 0,
'string': [], 'string': 0, 'string': [], 'string': 0,
'string': [], 'string': 0, 'string': [], 'string': 0}
elif var[id]['string'] != var:
raise var('string')
var = var[7]
var = 'string'
if var in ['string', 'string']:
var = var[4]
if var != 'string':
var[id]['string'] = var
var = int(var[6] != 'string')
if var != 0:
var[id]['string'] = var
var = 'string' if var == 'string' else 'string'
elif var == 'string':
var = 'string'
else:
raise Exception('string' + var + 'string' + str(var) +
'string' + str(var) + 'string' + str(id))
if var.startswith('string'):
var[id]['string' + var + 'string'].append(str(var) +
'string' + str(var))
var[id]['string' + var + 'string'] += 1
else:
var = var + 'string' if var != 'string' else var
var[id][var + 'string'].append(str(var) + 'string' + str(var))
var[id][var + 'string'] += 1
for var in range(2007, 2017):
for var in range(1, 7):
var = var(period='string' + str(var), year=str(var) + 'string' +
str(var + 1))
for var in var:
var = var[7]
id = int(var[10])
if var == 'string':
var = var[0]
if var not in ['string', 'string']:
raise Exception('string' + var + 'string' + str(var) +
'string' + str(var) + 'string' + str(id))
var = int(var == 'string')
if id not in var:
if var == 1:
var[id] = {'string': var, 'string': 0, 'string': []}
else:
continue
elif var[id]['string'] != var:
raise var('string')
var[id]['string'] += 1
var[id]['string'].append(str(var) + 'string' + str(var))
elif var == 'string':
if id not in var:
var[id] = {'string': [], 'string': 0}
var[id]['string'].append(str(var) + 'string' + str(var))
var[id]['string'] += 1
elif var == 'string':
if id not in var:
var[id] = {'string': [], 'string': 0}
var[id]['string'].append(str(var) + 'string' + str(var))
var[id]['string'] += 1
else:
raise Exception('string' + var + 'string' + str(var) +
'string' + str(var) + 'string' + str(id))
template_34
for var in var:
var['string'] = var
for var in var:
var['string'] = var
var = requests.get(var, params=var)
var = var.text
var = BeautifulSoup(var, 'string')
var = var.findAll('string')[0]
var = var.findAll('string')[2:]
var = re.search('string', var.findAll('string')[0].findAll('string'
)[0].get_text())
var = var.group(1)
var = var.group(2)
for var in var:
var = var.findAll('string')
var['string'].append(int(var[10].get_text()))
var['string'].append(var[0].get_text())
var['string'].append(var[1].get_text())
var['string' + var].append(var)
if var is 'string':
var['string'].append(None)
var['string'].append(None)
elif var is 'string':
var['string'].append(None)
var['string'].append(None)
elif var is 'string':
var['string'].append(None)
var['string'].append(None)
def plotLearningCurves2():
var = []
var = []
var = []
var = []
var = np.linspace(0.1, 0.8, 5)
for var in var:
var = []
var = []
for var in range(5):
var, var, var, var = train_test_split(var.values, var,
test_size=1 - var, stratify=var)
var = RandomForestClassifier(n_estimators=10, max_depth=20,
n_jobs=-1)
var.fit(var, var)
var.append(accuracy_score(var, var.predict(var)))
var.append(accuracy_score(var, var.predict(var)))
var.append(np.mean(var))
var.append(np.mean(var))
var.append(np.std(var))
var.append(np.std(var))
var = np.asarray(var)
var = np.asarray(var)
var = np.asarray(var)
var = np.asarray(var)
plt.figure()
plt.title('string', fontsize=14)
plt.legend(loc='string')
plt.xlabel('string', fontsize=14)
plt.ylabel('string', fontsize=14)
plt.ylim((0.8, 1.01))
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.gca().invert_yaxis()
plt.grid()
plt.plot(var, var, 'string', color='string', label='string')
plt.plot(var, var, 'string', color='string', label='string')
plt.fill_between(var, var - var, var + var, alpha=0.1, color='string')
plt.fill_between(var, var - var, var + var, alpha=0.1, color='string')
plt.draw()
plt.gca().invert_yaxis()
return plt
for var in var:
var['string'] = var
for var in var:
var['string'] = var
var = requests.get(var, params=var)
var = var.text
var = BeautifulSoup(var, 'string')
var = var.findAll('string')[0]
var = var.findAll('string')[2:]
var = re.search('string', var.findAll('string')[0].findAll('string'
)[0].get_text())
var = var.group(1)
var = var.group(2)
for var in var:
var = var.findAll('string')
var['string'].append(int(var[10].get_text()))
var['string'].append(var[0].get_text())
var['string'].append(var[1].get_text())
var['string' + var].append(var)
if var is 'string':
var['string'].append(None)
var['string'].append(None)
elif var is 'string':
var['string'].append(None)
var['string'].append(None)
elif var is 'string':
var['string'].append(None)
var['string'].append(None)
template_22
for var in var.get('string'):
for var in var.get('string'):
var = (var + 'string' + 'string' + var + 'string' + var + 'string' +
var.get('string').get(var) + 'string' + var + 'string' + var.
get('string').get(var))
var = requests.get(var)
var = BeautifulSoup(var.content, 'string')
try:
var = var.findAll('string', attrs={'string': 'string'})
for var in var:
if var['string'] != -1:
var = var['string'].find('string') + len('string') + 1
var = var['string'][var:]
var = var.find('string')
var = var[:var]
var = (var + 'string' + 'string' + var + 'string' + var +
'string' + var.get('string').get(var) + 'string' + var +
'string' + var.get('string').get(var) + 'string' + var)
var = requests.get(var)
var = BeautifulSoup(var.content, 'string')
var = var.find('string')
var = var.findChildren(['string', 'string'])
for var in var:
var = var.findChildren('string')
if len(var) == 12:
var = var[0].find(text=True)
var = var[1].find(text=True)
var = var[4].find(text=True)
var = var[6].find(text=True)
var = var[10].find(text=True)
if var == 'string':
var = float(var[:4]) + 0.5
else:
var = float(var[5:])
var = pd.Series([var, var, var, var, var, var], index=[
'string', 'string', 'string', 'string', 'string',
'string'])
var = var.append(var, ignore_index=True)
except:
pass
for var in var.get('string'):
for var in var.get('string'):
var = (var + 'string' + 'string' + var + 'string' + var + 'string' +
var.get('string').get(var) + 'string' + var + 'string' + var.
get('string').get(var))
var = requests.get(var)
var = BeautifulSoup(var.content, 'string')
try:
var = var.findAll('string', attrs={'string': 'string'})
for var in var:
if var['string'] != -1:
var = var['string'].find('string') + len('string') + 1
var = var['string'][var:]
var = var.find('string')
var = var[:var]
var = (var + 'string' + 'string' + var + 'string' + var +
'string' + var.get('string').get(var) + 'string' + var +
'string' + var.get('string').get(var) + 'string' + var)
var = requests.get(var)
var = BeautifulSoup(var.content, 'string')
var = var.find('string')
var = var.findChildren(['string', 'string'])
for var in var:
var = var.findChildren('string')
if len(var) == 12:
var = var[0].find(text=True)
var = var[1].find(text=True)
var = var[4].find(text=True)
var = var[6].find(text=True)
var = var[10].find(text=True)
if var == 'string':
var = float(var[:4]) + 0.5
else:
var = float(var[5:])
var = pd.Series([var, var, var, var, var, var], index=[
'string', 'string', 'string', 'string', 'string',
'string'])
var = var.append(var, ignore_index=True)
except:
pass
for var in var.get('string'):
for var in var.get('string'):
var = (var + 'string' + 'string' + var + 'string' + var + 'string' +
var.get('string').get(var) + 'string' + var + 'string' + var.
get('string').get(var))
var = requests.get(var)
var = BeautifulSoup(var.content, 'string')
try:
var = var.findAll('string', attrs={'string': 'string'})
for var in var:
if var['string'] != -1:
var = var['string'].find('string') + len('string') + 1
var = var['string'][var:]
var = var.find('string')
var = var[:var]
var = (var + 'string' + 'string' + var + 'string' + var +
'string' + var.get('string').get(var) + 'string' + var +
'string' + var.get('string').get(var) + 'string' + var)
var = requests.get(var)
var = BeautifulSoup(var.content, 'string')
var = var.find('string')
var = var.findChildren(['string', 'string'])
for var in var:
var = var.findChildren('string')
if len(var) == 12:
var = var[0].find(text=True)
var = var[1].find(text=True)
var = var[4].find(text=True)
var = var[6].find(text=True)
var = var[10].find(text=True)
if var == 'string':
var = float(var[:4]) + 0.5
else:
var = float(var[5:])
var = pd.Series([var, var, var, var, var, var], index=[
'string', 'string', 'string', 'string', 'string',
'string'])
var = var.append(var, ignore_index=True)
except:
pass
template_18
def remove_title(dataFrame):
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
return var.RawText
def remove_title(dataFrame):
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
return var.RawText
def remove_title(dataFrame):
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
var.RawText = var.RawText.apply(lambda x: re.sub('string', 'string', var))
return var.RawText
template_15
for var in range(2007, 2017):
var = bs4.BeautifulSoup(var[str(var) + 'string' + str(var + 1)], 'string')
var = [var.findAll('string') for var in var.findAll('string')]
var = var[3:]
var = bs4.BeautifulSoup(var[str(var) + 'string' + str(var + 1)], 'string')
var = [var.findAll('string') for var in var.findAll('string')]
var = var[3:]
var = bs4.BeautifulSoup(var[str(var) + 'string' + str(var + 1)], 'string')
var = [var.findAll('string') for var in var.findAll('string')]
var = var[3:]
if var != 2016:
for var in range(len(var)):
if var[var][10].get_text().replace('string', 'string') in var.keys(
):
var[var[var][10].get_text().replace('string', 'string')] = var[
var[var][10].get_text().replace('string', 'string')] + 1
else:
var[var[var][10].get_text().replace('string', 'string')] = 1
if not var[var][10].get_text().replace('string', 'string'
) in var.keys():
var[var[var][10].get_text().replace('string', 'string')] = var
var[var[var][10].get_text().replace('string', 'string')] = var[var
][0].get_text().replace('string', 'string')
for var in range(len(var)):
if var[var][10].get_text().replace('string', 'string') in var.keys(
):
var[var[var][10].get_text().replace('string', 'string')] = var[
var[var][10].get_text().replace('string', 'string')] + 1
if var != 2007 and var != 2008:
for var in range(len(var)):
if var[var][10].get_text().replace('string', 'string'
) in var.keys():
var[var[var][10].get_text().replace('string', 'string')
] = var[var[var][10].get_text().replace('string',
'string')] + 1
else:
for var in range(len(var)):
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
for var in range(len(var)):
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
for var in range(len(var)):
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
for var in range(2007, 2017):
var = bs4.BeautifulSoup(var[str(var) + 'string' + str(var + 1)], 'string')
var = [var.findAll('string') for var in var.findAll('string')]
var = var[3:]
var = bs4.BeautifulSoup(var[str(var) + 'string' + str(var + 1)], 'string')
var = [var.findAll('string') for var in var.findAll('string')]
var = var[3:]
var = bs4.BeautifulSoup(var[str(var) + 'string' + str(var + 1)], 'string')
var = [var.findAll('string') for var in var.findAll('string')]
var = var[3:]
if var != 2016:
for var in range(len(var)):
if var[var][10].get_text().replace('string', 'string') in var.keys(
):
var[var[var][10].get_text().replace('string', 'string')] = var[
var[var][10].get_text().replace('string', 'string')] + 1
else:
var[var[var][10].get_text().replace('string', 'string')] = 1
if not var[var][10].get_text().replace('string', 'string'
) in var.keys():
var[var[var][10].get_text().replace('string', 'string')] = var
var[var[var][10].get_text().replace('string', 'string')] = var[var
][0].get_text().replace('string', 'string')
for var in range(len(var)):
if var[var][10].get_text().replace('string', 'string') in var.keys(
):
var[var[var][10].get_text().replace('string', 'string')] = var[
var[var][10].get_text().replace('string', 'string')] + 1
if var != 2007 and var != 2008:
for var in range(len(var)):
if var[var][10].get_text().replace('string', 'string'
) in var.keys():
var[var[var][10].get_text().replace('string', 'string')
] = var[var[var][10].get_text().replace('string',
'string')] + 1
else:
for var in range(len(var)):
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
for var in range(len(var)):
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
for var in range(len(var)):
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
for var in range(2007, 2017):
var = bs4.BeautifulSoup(var[str(var) + 'string' + str(var + 1)], 'string')
var = [var.findAll('string') for var in var.findAll('string')]
var = var[3:]
var = bs4.BeautifulSoup(var[str(var) + 'string' + str(var + 1)], 'string')
var = [var.findAll('string') for var in var.findAll('string')]
var = var[3:]
var = bs4.BeautifulSoup(var[str(var) + 'string' + str(var + 1)], 'string')
var = [var.findAll('string') for var in var.findAll('string')]
var = var[3:]
if var != 2016:
for var in range(len(var)):
if var[var][10].get_text().replace('string', 'string') in var.keys(
):
var[var[var][10].get_text().replace('string', 'string')] = var[
var[var][10].get_text().replace('string', 'string')] + 1
else:
var[var[var][10].get_text().replace('string', 'string')] = 1
if not var[var][10].get_text().replace('string', 'string'
) in var.keys():
var[var[var][10].get_text().replace('string', 'string')] = var
var[var[var][10].get_text().replace('string', 'string')] = var[var
][0].get_text().replace('string', 'string')
for var in range(len(var)):
if var[var][10].get_text().replace('string', 'string') in var.keys(
):
var[var[var][10].get_text().replace('string', 'string')] = var[
var[var][10].get_text().replace('string', 'string')] + 1
if var != 2007 and var != 2008:
for var in range(len(var)):
if var[var][10].get_text().replace('string', 'string'
) in var.keys():
var[var[var][10].get_text().replace('string', 'string')
] = var[var[var][10].get_text().replace('string',
'string')] + 1
else:
for var in range(len(var)):
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
for var in range(len(var)):
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
for var in range(len(var)):
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
if var[var][10].get_text().replace('string', 'string') in var:
del var[var[var][10].get_text().replace('string', 'string')]
template_11
def genFeatures(player):
"""string"""
var = var.iloc(0)[0]
var = relativedelta(datetime.datetime.now(), var['string']).years
var = var['string'] / pow(var['string'] / 100, 2)
var = var['string'].sum()
var = var['string'].sum() / var
var = var['string'].sum() / var
var = var['string'].sum() / var
var = var['string'].sum() / var
var = var['string'].sum() / var
var = var['string'].sum() / var
var = (var['string'] * var['string']).sum() / var
var = (var['string'] * var['string']).sum() / var
var = var['string'].mean()
var = var['string'].mean()
var = math.sqrt((var['string'] * pow(var['string'], 2)).sum()) / var[
'string'].sum()
var = math.sqrt((var['string'] * pow(var['string'], 2)).sum()) / var[
'string'].sum()
var = (var['string'] * (var['string'] > 0.35)).sum() / (0.001 + (var[
'string'] * (var['string'] > 0.35)).sum())
var = (var['string'] * (var['string'] < 0.35)).sum() / (0.001 + (var[
'string'] * (var['string'] < 0.35)).sum())
var = var / (0.001 + var)
var = (var['string'] * (var['string'] > 0.5)).sum() / (0.001 + (var[
'string'] * (var['string'] > 0.5)).sum())
var = (var['string'] * (var['string'] < 0.5)).sum() / (0.001 + (var[
'string'] * (var['string'] < 0.5)).sum())
var = var / (0.001 + var)
var = var['string']
var = var['string'].sum() / var
return pd.Series({'string': var['string'], 'string': var, 'string': var
['string'], 'string': var['string'], 'string': var, 'string': var[
'string'], 'string': var, 'string': var, 'string': var, 'string':
var, 'string': var, 'string': var, 'string': var / (var + 0.001),
'string': var, 'string': (var['string'] + var['string']) / 2,
'string': var, 'string': var, 'string': var, 'string': var,
'string': var, 'string': var, 'string': var, 'string': var,
'string': var, 'string': var})
def genFeatures(player):
"""string"""
var = var.iloc(0)[0]
var = relativedelta(datetime.datetime.now(), var['string']).years
var = var['string'] / pow(var['string'] / 100, 2)
var = var['string'].sum()
var = var['string'].sum() / var
var = var['string'].sum() / var
var = var['string'].sum() / var
var = var['string'].sum() / var
var = var['string'].sum() / var
var = var['string'].sum() / var
var = (var['string'] * var['string']).sum() / var
var = (var['string'] * var['string']).sum() / var
var = var['string'].mean()
var = var['string'].mean()
var = math.sqrt((var['string'] * pow(var['string'], 2)).sum()) / var[
'string'].sum()
var = math.sqrt((var['string'] * pow(var['string'], 2)).sum()) / var[
'string'].sum()
var = (var['string'] * (var['string'] > 0.35)).sum() / (0.001 + (var[
'string'] * (var['string'] > 0.35)).sum())
var = (var['string'] * (var['string'] < 0.35)).sum() / (0.001 + (var[
'string'] * (var['string'] < 0.35)).sum())
var = var / (0.001 + var)
var = (var['string'] * (var['string'] > 0.5)).sum() / (0.001 + (var[
'string'] * (var['string'] > 0.5)).sum())
var = (var['string'] * (var['string'] < 0.5)).sum() / (0.001 + (var[
'string'] * (var['string'] < 0.5)).sum())
var = var / (0.001 + var)
var = var['string']
var = var['string'].sum() / var
return pd.Series({'string': var['string'], 'string': var, 'string': var
['string'], 'string': var['string'], 'string': var, 'string': var[
'string'], 'string': var, 'string': var, 'string': var, 'string':
var, 'string': var, 'string': var, 'string': var / (var + 0.001),
'string': var, 'string': (var['string'] + var['string']) / 2,
'string': var, 'string': var, 'string': var, 'string': var,
'string': var, 'string': var, 'string': var, 'string': var,
'string': var, 'string': var})
for var in set(var.index):
var = var.loc[var]
var = np.std(a=var['string'])
var = np.std(a=var['string'])
var = {'string': var, 'string': var['string'].mean(), 'string': var[
'string'].mean(), 'string': var['string'].mean(), 'string': var[
'string'].mean(), 'string': var['string'].mean(), 'string': var[
'string'].sum(), 'string': var['string'].sum(), 'string': var[
'string'].sum(), 'string': var['string'].sum(), 'string': var[
'string'].sum(), 'string': var['string'].sum(), 'string': var[
'string'].sum(), 'string': var['string'].sum(), 'string': var[
'string'].mean(), 'string': var['string'].mean(), 'string': var[
'string'].mean(), 'string': var, 'string': var}
var = var.append(pd.Series(var), ignore_index=True)
In [ ]:
In [ ]:
In [ ]:
Content source: DataPilot/notebook-miner
Similar notebooks: